

```{r Library}

require(tidyverse)
require(haven)
require(janitor)
require(tidycensus)
require(survey)
require(srvyr)


`%nin%` <- Negate(`%in%`)


## Addressing some code where there is only 1 PSU for MRP
options(survey.adjust.domain.lonely=TRUE)
options(survey.lonely.psu="adjust")


```





```{r Data load}


## Survey data

surveys_all <- read_csv(file = "collected_surveys_all.csv")


survey_list <- surveys_all %>%
  group_by(Year,Survey)%>%
  reframe(count=n())


## Census data (PUMS-level estimates)
## Look to Bernard Fraga code for how to get PUMS population estimates

## Loading each individually to ensure the data is structured properly before merging all together

census1 <- read_csv(file = "PUMS Population Proportions/Census_1990_Counts2.csv")
census2 <- read_csv(file = "PUMS Population Proportions/Census_2000_Counts2.csv")
census3 <- read_csv(file = "PUMS Population Proportions/PUMS_2001_Counts2.csv")
census4 <- read_csv(file = "PUMS Population Proportions/PUMS_2002_Counts2.csv")
census5 <- read_csv(file = "PUMS Population Proportions/PUMS_2003_Counts2.csv")
census6 <- read_csv(file = "PUMS Population Proportions/PUMS_2004_Counts2.csv")
census7 <- read_csv(file = "PUMS Population Proportions/PUMS_2005_Counts2.csv")
census8 <- read_csv(file = "PUMS Population Proportions/PUMS_2006_Counts2.csv")
census9 <- read_csv(file = "PUMS Population Proportions/PUMS_2007_Counts2.csv")
census10 <- read_csv(file = "PUMS Population Proportions/PUMS_2008_Counts2.csv")
census11 <- read_csv(file = "PUMS Population Proportions/PUMS_2009_Counts2.csv")
census12 <- read_csv(file = "PUMS Population Proportions/PUMS_2010_Counts2.csv")
census13 <- read_csv(file = "PUMS Population Proportions/PUMS_2011_Counts2.csv")
census14 <- read_csv(file = "PUMS Population Proportions/PUMS_2012_Counts2.csv")
census15 <- read_csv(file = "PUMS Population Proportions/PUMS_2013_Counts2.csv")
census16 <- read_csv(file = "PUMS Population Proportions/PUMS_2014_Counts2.csv")
census17 <- read_csv(file = "PUMS Population Proportions/PUMS_2015_Counts2.csv")
census18 <- read_csv(file = "PUMS Population Proportions/PUMS_2016_Counts2.csv")
census19 <- read_csv(file = "PUMS Population Proportions/PUMS_2017_Counts2.csv")
census20 <- read_csv(file = "PUMS Population Proportions/PUMS_2018_Counts2.csv")
census21 <- read_csv(file = "PUMS Population Proportions/PUMS_2019_Counts2.csv")
census22 <- read_csv(file = "PUMS Population Proportions/PUMS_2020_Counts2.csv")
census23 <- read_csv(file = "PUMS Population Proportions/PUMS_2021_Counts2.csv")
census24 <- read_csv(file = "PUMS Population Proportions/PUMS_2022_Counts2.csv")
census25 <- read_csv(file = "PUMS Population Proportions/PUMS_2023_Counts2.csv")


census <- rbind(census1, census2, census3, census4, census5, census6,
                     census7, census8, census9, census10, census11, census12,
                     census13, census14, census15, census16, census17, census18,
                     census19, census20, census21, census22, census23, census24, census25)

rm(census1, census2, census3, census4, census5, census6,
                     census7, census8, census9, census10, census11, census12,
                     census13, census14, census15, census16, census17, census18,
                     census19, census20, census21, census22, census23, census24, census25)

write_csv(census,
          file = "Data/census_all.csv")

```



```{r Checking and cleaning Census data}

#### State

diag_df <- census %>%
  group_by(ST) %>%
  reframe(count=n())

## They're labelled either 2 digit or 5 digit (2 digit, then 000 at the end)

## Remove hanging 00s
census <- census %>%
  mutate(ST = ifelse(ST > 900,
                     ST/1000,
                     ST))

diag_df <- census %>%
  group_by(ST) %>%
  reframe(count=n())

census$state <- ""
census$state[census$ST==1] <-  "AL"   
census$state[census$ST==2] <-  "AK"   
census$state[census$ST==4] <-  "AZ"   
census$state[census$ST==5] <-  "AR"   
census$state[census$ST==6] <-  "CA"   
census$state[census$ST==8] <-  "CO"   
census$state[census$ST==9] <-  "CT"   
census$state[census$ST==10] <-  "DE"   
census$state[census$ST==11] <-  "DC"   
census$state[census$ST==12] <-  "FL"   
census$state[census$ST==13] <-  "GA"   
census$state[census$ST==15] <-  "HI"   
census$state[census$ST==16] <-  "ID"   
census$state[census$ST==17] <-  "IL"   
census$state[census$ST==18] <-  "IN"   
census$state[census$ST==19] <-  "IA"   
census$state[census$ST==20] <-  "KS"   
census$state[census$ST==21] <-  "KY"   
census$state[census$ST==22] <-  "LA"   
census$state[census$ST==23] <-  "ME"   
census$state[census$ST==24] <-  "MD"   
census$state[census$ST==25] <-  "MA"   
census$state[census$ST==26] <-  "MI"   
census$state[census$ST==27] <-  "MN"   
census$state[census$ST==28] <-  "MS"   
census$state[census$ST==29] <-  "MO"   
census$state[census$ST==30] <-  "MT"   
census$state[census$ST==31] <-  "NE"   
census$state[census$ST==32] <-  "NV"   
census$state[census$ST==33] <-  "NH"   
census$state[census$ST==34] <-  "NJ"   
census$state[census$ST==35] <-  "NM"   
census$state[census$ST==36] <-  "NY"   
census$state[census$ST==37] <-  "NC"   
census$state[census$ST==38] <-  "ND"   
census$state[census$ST==39] <-  "OH"   
census$state[census$ST==40] <-  "OK"   
census$state[census$ST==41] <-  "OR"   
census$state[census$ST==42] <-  "PA"   
census$state[census$ST==44] <-  "RI"   
census$state[census$ST==45] <-  "SC"   
census$state[census$ST==46] <-  "SD"   
census$state[census$ST==47] <-  "TN"   
census$state[census$ST==48] <-  "TX"   
census$state[census$ST==49] <-  "UT"   
census$state[census$ST==50] <-  "VT"   
census$state[census$ST==51] <-  "VA"   
census$state[census$ST==53] <-  "WA"   
census$state[census$ST==54] <-  "WV"   
census$state[census$ST==55] <-  "WI"   
census$state[census$ST==56] <-  "WY"


diag_df <- census %>%
  group_by(ST,state) %>%
  reframe(count=n())


## Merge states into regional groupings
census$region2 <- ""
census$region2[census$state %in% c("CA")] <- "CA" 
census$region2[census$state %in% c("TX")] <- "TX"
census$region2[census$state %in% c("AZ")] <- "AZ"
census$region2[census$state %in% c("NM")] <- "NM"
census$region2[census$state %in% c("NV")] <- "NV"
census$region2[census$state %in% c("CO")] <- "CO"
census$region2[census$state %in% c("FL")] <- "FL"
census$region2[census$state %in% c("NY","NJ","PA")] <- "NY/NJ/PA" # All of Northeast, Middle Atlantic
census$region2[census$state %in% c("IL","IN","MI","OH","WI")] <- "Midwest_ENC" # East North Central, Midwest 
census$region2[census$state %in% c("AK","HI","OR","WA")] <- "West_Pac" # Pacific, West (ex CA)
census$region2[census$state %in% c("AL","AR","GA","KY","LA","MS","NC","SC","TN","VA")] <- "Confederacy" # Confederacy (ex TX)
census$region2[census$state %in% c("CT","MA","ME","NH","RI","VT")] <- "NE_NewEng" # New England, Northeast
census$region2[census$state %in% c("DC","DE","MD","WV")] <- "East_Atlantic" # Part of South Atlantic
census$region2[census$state %in% c("IA","KS","MO","NE","OK","SD","ND", "MN")] <- "Midwest_WNC" # West North Central (Midwest) + OK (ex TX, which is West South Central, South)
census$region2[census$state %in% c("ID","MT","UT","WY")] <- "West_Mount" # Mountain, West (ex NV, CO, AZ, NM)

census$region_pew <- ""
census$region_pew[census$state %in% c("CA","WA","OR","AK","HI","ID","MT","WY","NV","UT","CO","AZ","NM")] <- "West"
census$region_pew[census$state %in% c("ND","SD","NE","KS","MN","IA","MO","WI","IL","IN","MI","OH")] <- "Midwest"
census$region_pew[census$state %in% c("TX","OK","AR","LA","MS","AL","TN","KY","FL","GA","SC","NC","VA","WV","DC","DE","MD")] <- "South"
census$region_pew[census$state %in% c("NY","PA","NJ","CT","RI","MA","NH","VT","ME")] <- "Northeast"

census$region_sub <- ""
census$region_sub[census$state %in% c("CA","WA","OR","AK","HI")] <- "Pacific"
census$region_sub[census$state %in% c("ID","MT","WY","NV","UT","CO","AZ","NM")] <- "Mountain"
census$region_sub[census$state %in% c("ND","SD","NE","KS","MN","IA","MO")] <- "W_NorthCentral"
census$region_sub[census$state %in% c("WI","IL","IN","MI","OH")] <- "E_NorthCentral"
census$region_sub[census$state %in% c("TX","OK","AR","LA")] <- "W_SouthCentral"
census$region_sub[census$state %in% c("MS","AL","TN","KY")] <- "E_SouthCentral"
census$region_sub[census$state %in% c("FL","GA","SC","NC","VA","WV","DC","DE","MD")] <- "S_Atlantic"
census$region_sub[census$state %in% c("NY","PA","NJ")] <- "M_Atlantic"
census$region_sub[census$state %in% c("CT","RI","MA","NH","VT","ME")] <- "NewEngland"

census$region_lnps <- ""
census$region_lnps[census$state %in% c("FL")] <- "FLORIDA"
census$region_lnps[census$state %in% c("TX","AZ","NM","CO","NV")] <- "SOUTHWEST"
census$region_lnps[census$state %in% c("CA","OR")] <- "WEST COAST"
census$region_lnps[census$state %in% c("NY","PA","NJ","MA")] <- "NORTHEAST"
census$region_lnps[census$state %in% c("IL","IA","NE")] <- "MIDWEST"

diag_df <- census %>%
  group_by(state,
           region_pew,
           region_sub,
           region_lnps) %>%
  reframe(count=n())

#### USBorn

diag_df <- census %>%
  group_by(USborn) %>%
  reframe(count=n())

#### origin

diag_df <- census %>%
  group_by(origin) %>%
  reframe(count=n())

census$origin_old <- census$origin

census <- census %>%
  mutate(origin = case_when(
    origin_old %in% c("Mex", "Mexican") ~ "Mexican",
    origin_old %in% c("PR", "Puerto Rican") ~ "Puerto Rican",
    origin_old %in% c("Cub", "Cuban") ~ "Cuban",
    origin_old %in% c("Dom", "Dominican") ~ "Dominican",
    origin_old %in% c("Salv", "Salvadoran") ~ "Salvadoran",
    TRUE ~ "Other"
  ))

diag_df <- census %>%
  group_by(origin, origin_old) %>%
  reframe(count=n())


#### education

diag_df <- census %>%
  group_by(education) %>%
  reframe(count=n())

## Separate into CollGrad binary

census <- census %>%
  mutate(CollGrad = ifelse(education == "CollGrad",
                           1,
                           0)) %>%
  ## Fixing NA
  mutate(CollGrad = ifelse(is.na(education) == TRUE,
                           0,
                           CollGrad))

diag_df <- census %>%
  group_by(education, CollGrad) %>%
  reframe(count=n())



#### sex

diag_df <- census %>%
  group_by(sex) %>%
  reframe(count=n())


#### agegroup

diag_df <- census %>%
  group_by(agegroup) %>%
  reframe(count=n())

## Remove all under 18 and NA age

census <- census %>%
  filter(agegroup %nin% c("",
                          "age16-17")) %>%
  filter(is.na(agegroup)==FALSE)


## Merge 75-89 and 90plus

census <- census %>%
  mutate(agegroup = ifelse(agegroup %in% c("age90plus",
                                           "age75-89"),
                           "age75plus",
                           agegroup)
  )

diag_df <- census %>%
  group_by(agegroup) %>%
  reframe(count=n())

#### Final variables and subsetting

#### Make category groupings
#### Make WeightedN (check against existing)

## Some surveys are by region instead of by state:
## region_lnps for LNPS data
## region for Pew (2006, 2007, 2009, 2013, 2014, 2015)
## region_sub for Pew (2008, 2018)
## region2 for remainder

census <- data.frame(census)

census_lnps <- census %>%
  group_by(region_lnps, 
           USborn, 
           origin, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region_lnps,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

census_pewregion <- census %>%
  group_by(region_pew, 
           USborn, 
           origin, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

census_pewregion_sub <- census %>%
  group_by(region_sub, 
           USborn, 
           origin, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region_sub,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

census_region2 <- census %>%
  group_by(region2, 
           USborn, 
           origin, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

census_region2_noorigin <- census %>%
  group_by(region2, 
           USborn, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

census_region2_noorigin_nousborn <- census %>%
  group_by(region2, 
           USborn, 
           sex, 
           agegroup,
           CollGrad, 
           year) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(region2,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))


census_noregion <- census %>%
  group_by(USborn, 
           sex, 
           agegroup,
           CollGrad, 
           year,
           origin) %>%
  summarize(WeightedN = sum(WeightedN)) %>%
  mutate(category = paste(USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

#### WeightedN

diag_df <- census_lnps %>%
  group_by(category) %>%
  reframe(count=n())

# Min of 1

diag_df <- census_pewregion %>%
  group_by(category) %>%
  reframe(count=n())

# Min of 1

diag_df <- census_pewregion_sub %>%
  group_by(category) %>%
  reframe(count=n())

# Min of 1

diag_df <- census_region2 %>%
  group_by(category) %>%
  reframe(count=n())

# Min of 1



```


```{r Final clean and prep for Survey data}

survey <- surveys_all

#### agegroup

diag_df <- survey %>%
  group_by(age) %>%
  reframe(count=n())

survey$agegroup <- ""
survey$agegroup[survey$age>=18 & survey$age<=24] <- "age18-24"
survey$agegroup[survey$age>=25 & survey$age<=29] <- "age25-29"
survey$agegroup[survey$age>=30 & survey$age<=44] <- "age30-44"
survey$agegroup[survey$age>=45 & survey$age<=59] <- "age45-59"
survey$agegroup[survey$age>=60 & survey$age<=74] <- "age60-74"
survey$agegroup[survey$age>=75] <- "age75plus"
survey <- subset(survey, agegroup != "") # Remove NA age


diag_df <- survey %>%
  group_by(age, agegroup) %>%
  reframe(count=n())


#### education/CollGrad

diag_df <- survey %>%
  group_by(education) %>%
  reframe(count=n())

## Making just a CollGrad binary

survey <- survey %>%
  mutate(CollGrad = ifelse(education == "CollGrad",
                           1,
                           0)) %>%
  mutate(CollGrad = ifelse(is.na(education)==TRUE,
                           0,
                           CollGrad))

diag_df <- survey %>%
  group_by(education, CollGrad) %>%
  reframe(count=n())


## National origin

diag_df <- survey %>%
  group_by(origin) %>%
  reframe(count=n())

## Restrict to top 5

survey <- survey %>%
  mutate(origin = case_when(
    origin %in% c("Mexican",
                   "Puerto Rican",
                   "Cuban",
                   "Dominican",
                   "Salvadoran") ~ origin,
    TRUE ~ "Other")
  )


diag_df <- survey %>%
  group_by(Year,
           Survey,
           origin) %>%
  reframe(count=n())


## Various regions

## Merge states into regional groupings based on CCES and other survey groups
survey$region2 <- ""
survey$region2[survey$state %in% c("CA")] <- "CA" 
survey$region2[survey$state %in% c("TX")] <- "TX"
survey$region2[survey$state %in% c("AZ")] <- "AZ"
survey$region2[survey$state %in% c("NM")] <- "NM"
survey$region2[survey$state %in% c("NV")] <- "NV"
survey$region2[survey$state %in% c("CO")] <- "CO"
survey$region2[survey$state %in% c("FL")] <- "FL"
survey$region2[survey$state %in% c("NY","NJ","PA")] <- "NY/NJ/PA" # All of Northeast, Middle Atlantic
survey$region2[survey$state %in% c("IL","IN","MI","OH","WI")] <- "Midwest_ENC" # All of East North Central, Midwest 
survey$region2[survey$state %in% c("AK","HI","OR","WA")] <- "West_Pac" # Pacific, West (ex CA)
survey$region2[survey$state %in% c("AL","AR","GA","KY","LA","MS","NC","SC","TN","VA")] <- "Confederacy" # Confederacy (ex TX)
survey$region2[survey$state %in% c("CT","MA","ME","NH","RI","VT")] <- "NE_NewEng" # All of New England, Northeast
survey$region2[survey$state %in% c("DC","DE","MD","WV")] <- "East_Atlantic" # Part of South Atlantic
survey$region2[survey$state %in% c("IA","KS","MO","NE","OK","SD","ND", "MN")] <- "Midwest_WNC" # West North Central (Midwest) + OK (ex TX, which is West South Central, South)
survey$region2[survey$state %in% c("ID","MT","UT","WY")] <- "West_Mount" # Mountain, West (ex NV, CO, AZ, NM)



## Pew region is a list of 4 (for some years)
survey$region_pew <- ""
survey$region_pew[survey$region ==4 | survey$region == "West"] <- "West"
survey$region_pew[survey$region ==2 | survey$region == "North Central"] <- "Midwest"
survey$region_pew[survey$region ==3 | survey$region == "South"] <- "South"
survey$region_pew[survey$region ==1 | survey$region == "Northeast"] <- "Northeast"

diag_df <- survey %>%
  group_by(state,
           region,
           region2) %>%
  reframe(count=n())


diag_df <- survey %>%
  group_by(Survey,
           region,
           region_pew) %>%
  reframe(count=n())

```


# Older surveys

```{r LNPS 1989}


## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Survey == "LNPS")


## 2. Make "category" variable (different for each survey)


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region) %>%
  reframe(count=n())

diag_df <- census_lnps %>%
  group_by(region_lnps) %>%
  reframe(count=n())

## Remove blank region obs from Census

census_temp <- census_lnps %>%
  filter(region_lnps %in% c("FLORIDA",
                            "MIDWEST",
                            "NORTHEAST",
                            "SOUTHWEST",
                            "WEST COAST"))

## Rename to make same

census_temp <- census_temp %>% 
  rename(region = region_lnps)


## Make category variable in survey
## LNPS: Using region_lnps

survey_temp <- survey_temp %>%
  mutate(category = paste(region,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Only 1989; Using 1990 Census

census_temp <- census_lnps %>%
  filter(year == 1990)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

## 3 are not shared from survey; check

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Small populations or NA values

survey_temp <- survey_temp %>%
  filter(category %in% census_temp$category)


## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(WeightedN,
         category)

## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)

survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)

survey.ps_lnps1989 <- survey.ps

## Summary: LNPS may have some pretty weird population dynamics
## Check next one

demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_lnps1989)

```



```{r Kaiser 1999}


## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Survey == "Kaiser")


## 2. Make "category" variable (different for each survey)


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())

## Make region (region2 I believe)

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Only 1999; Using 2000 Census

census_temp <- census_region2 %>%
  filter(year == 2000)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 4 are not shared from survey; check

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Small populations or NA values

survey_temp <- survey_temp %>%
  filter(category %in% census_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(WeightedN,
         category)

## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_kaiser1999 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)

## Checking demographic balance

demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_kaiser1999)


```




```{r Kaiser-Pew 2002}

## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Survey == "Kaiser-Pew")


## 2. Make "category" variable (different for each survey)


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())

## Make region (region2 I believe)

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1



## 3. Load temp census files

## Using 2002 Census

census_temp <- census_region2 %>%
  filter(year == 2002)


## Verify/check Origin in both Survey and Census

diag_df1 <- survey_temp %>%
  group_by(origin) %>%
  reframe(count=n())

diag_df2 <- census_temp %>%
  group_by(origin) %>%
  reframe(count=n())


## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)



categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 20 are not shared from survey; check

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp, nest = TRUE)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps_kaiserpew2002 <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_kaiserpew2002 <- survey.ps_kaiserpew2002 %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_kaiserpew2002)


```


```{r Pew 2004}

## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Survey == "Pew") %>%
  filter(Year == 2004)


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())


survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())

## Make region (region2 I believe)

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2004 Census

census_temp <- census_region2 %>%
  filter(year == 2004)

## Verify/check Origin in both Survey and Census

diag_df1 <- survey_temp %>%
  group_by(origin) %>%
  reframe(count=n())

diag_df2 <- census_temp %>%
  group_by(origin) %>%
  reframe(count=n())

diag_df1 <- survey_temp %>%
  group_by(origin) %>%
  reframe(count=n())

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 12 are not shared from survey; check

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps_pew2004 <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2004 <- survey.ps_pew2004 %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_pew2004)

```


# 2006

```{r 2006 and beyond Note}


# Using region_pew: Pew 2006, 2007, 2009, 2013, 2014, 2015
# Using region_sub: Pew 2008, Pew 2018
# Remainder: use region2


# LNS 2006: has origin and state/region
# Pew 2006: does not have state/region
# CCES 2006: does not have national origin

```


```{r LNS 2006}

## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2006 &
         Survey %in% c("LNS"))


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)


## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )

diag_df1 <- survey_temp %>%
  group_by(origin) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2006 Census

census_temp <- census_region2 %>%
  filter(year == 2006)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 46 are not shared from survey; check

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_lns2006 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_lns2006)


```



```{r Pew 2006}

## State/region is under "region_pew"
## 1. Load temp survey file


survey_temp <- survey %>%
  filter(Year == 2006 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2006 Census

census_temp <- census_pewregion %>%
  filter(year == 2006)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2006 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)

names(survey.ps_pew2006)


```



```{r CCES 2006}


## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2006 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## No origin

## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2006 Census region2, no origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2006)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_cces2006 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)



```


# Pew 2007-2018

```{r Pew 2007}

## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2007 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2007 Census

census_temp <- census_pewregion %>%
  filter(year == 2007)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2007 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)


```


```{r Pew 2008}

## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2008 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2008 Census

census_temp <- census_pewregion %>%
  filter(year == 2008)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2008 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)


```


```{r Pew 2009}

## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2009 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())

## Re-code origin so Dom/Salv are in Other

survey_temp <- survey_temp %>%
  mutate(origin = case_when(
    origin %in% c("Mexican", "Puerto Rican", "Cuban") ~ origin,
    TRUE ~ "Other")
  )


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2009 Census

census_temp <- census_pewregion %>%
  filter(year == 2009)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)


survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2009 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA


print(demog)


```


```{r Pew 2010}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2010 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2010 Census

census_temp <- census_pewregion %>%
  filter(year == 2010)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)

survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2010 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```

```{r Pew 2011}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2011 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2011 Census

census_temp <- census_pewregion %>%
  filter(year == 2011)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)

survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))

survey.ps_pew2011 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```

```{r Pew 2012}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2012 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2012 Census

census_temp <- census_pewregion %>%
  filter(year == 2012)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2012 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```


```{r Pew 2013}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2013 &
         Survey %in% c("Pew"))

names(survey_temp)


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew,
           region2,
           region) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2013 Census

census_temp <- census_pewregion %>%
  filter(year == 2013)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 55 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2013 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```



```{r Pew 2014}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2014 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew,
           region2,
           region) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2014 Census

census_temp <- census_pewregion %>%
  filter(year == 2014)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2014 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```

```{r Pew 2015}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2015 &
         Survey %in% c("Pew"))


diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region_pew,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2015 Census

census_temp <- census_pewregion %>%
  filter(year == 2015)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2015 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```

```{r Pew 2016}


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2016 &
         Survey %in% c("Pew"))


## Not region_pew
diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())

## No region data?


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2016 Census

census_temp <- census_noregion %>%
  filter(year == 2016)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2016 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```


```{r Pew 2018} 


## State/region is under "region_pew"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2018 &
         Survey %in% c("Pew"))


## As with 2016, no region

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region_pew) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2018 Census

census_temp <- census_noregion %>%
  filter(year == 2018)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_pew2018 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```


# CCES/CES 2008-2022

```{r CCES 2008}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2008 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2008 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2008)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2008 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```


```{r CCES 2009}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2009 &
         Survey %in% c("CES"))

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2009 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2009)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2009 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```

```{r CCES 2010}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2010 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2010 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2010)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 2 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2010 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```



```{r CCES 2011}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2011 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2011 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2011)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2011 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```

```{r CCES 2012}
## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2012 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2012 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2012)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2012 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```


```{r CCES 2013}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2013 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2013 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2013)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2013 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)




```


```{r CCES 2014}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2014&
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2014 Census
## No origin

census_temp <- census_region2_noorigin %>%
  filter(year == 2014)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2014 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```



```{r CCES 2015}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2015 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn

## No "USBorn" variable either

diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

# survey_temp <- survey_temp %>%
#   filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin
## No origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          CollGrad,
                          sex,
                          agegroup,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2015 Census
## No origin, no US born

census_temp <- census_region2_noorigin_nousborn %>%
  filter(year == 2015)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## All shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2015 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)


```

```{r CCES 2016}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2016 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2016 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2016)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 17 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2016 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)




```


```{r CCES 2017}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2017 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2017 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2017)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 6 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2017 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```


```{r CCES 2018}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2018 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2018 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2018)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 9 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2018 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```



```{r CES 2019}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2019 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2018 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2019)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 9 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2019 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```




```{r CES 2020}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2020 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2020 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2020)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 12 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2020 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```



```{r CES 2021}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2021 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2018 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2021)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 9 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_temp <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_temp <- rbind(census_temp,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_temp, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2021 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census, USborn == 1)$WeightedN)/sum(census$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census, education == "CollGrad")$WeightedN)/sum(census$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```




```{r CES 2022}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2022 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## 69 other; drop for now
survey_temp <- survey_temp %>%
  filter(sex != "Other")

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2022 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2022)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 2 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_merge <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_merge <- rbind(census_merge,
                     category_merge)

categories1 <- tibble(unique(census_merge$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_merge, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2022 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census_temp, USborn == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census_temp, CollGrad == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```



```{r CES 2023}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2023 &
         Survey %in% c("CES"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

## 69 other; drop for now
survey_temp <- survey_temp %>%
  filter(sex != "Other" ) %>%
  filter(is.na(sex) == FALSE)

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2023 Census
## Now has origin and USBorn

census_temp <- census_region2 %>%
  filter(year == 2023)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 2 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_merge <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_merge <- rbind(census_merge,
                     category_merge)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_merge, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_ces2023 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census_temp, USborn == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$CollGrad <- c(svymean(~CollGrad, svy.oldweight), svymean(~CollGrad, svy.ps), sum(subset(census_temp, CollGrad == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)




```



# CMPS 16, 20

## 08 and 12 lack PID lean


```{r CMPS 2016}

## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2016 &
         Survey %in% c("CMPS"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad,
           education) %>%
  reframe(count=n())

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(sex)==FALSE)

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2016 Census

census_temp <- census_region2 %>%
  filter(year == 2016)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 2 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_merge <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_merge <- rbind(census_merge,
                     category_merge)

categories1 <- tibble(unique(census_merge$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_merge, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_cmps2016 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)


demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census_temp, USborn == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)

```

```{r CMPS 2020}


## State/region is under "region2"
## 1. Load temp survey file

survey_temp <- survey %>%
  filter(Year == 2020 &
         Survey %in% c("CMPS"))


diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## 2. Make "category" variable (different for each survey)


## Check variables
## USBorn


diag_df <- survey_temp %>%
  group_by(USborn) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(is.na(USborn)==FALSE)

## CollGrad

diag_df <- survey_temp %>%
  group_by(CollGrad,
           education) %>%
  reframe(count=n())

survey_temp$CollGrad <- survey_temp$education

## sex

diag_df <- survey_temp %>%
  group_by(sex) %>%
  reframe(count=n())

survey_temp <- survey_temp %>%
  filter(sex %in% c("M","F"))

## agegroup

diag_df <- survey_temp %>%
  group_by(agegroup) %>%
  reframe(count=n())

## origin

diag_df <- survey_temp %>%
  group_by(origin, Survey) %>%
  reframe(count=n())


## First, checking region:

diag_df <- survey_temp %>%
  group_by(region2) %>%
  reframe(count=n())


## Make category

survey_temp <- survey_temp %>%
  mutate(category = paste(region2,
                          USborn,
                          CollGrad,
                          sex,
                          agegroup,
                          origin,
                          sep = "-"))

## Make "Dem" variable for now to compare

survey_temp$Dem <- 0
survey_temp$Dem[survey_temp$Party == "Democrat"] <- 1


## 3. Load temp census files

## Using 2020 Census

census_temp <- census_region2 %>%
  filter(year == 2020)

## 4. Checking that categories are shared across; if not, create new

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## Census still has many more categories

## First, check to see which ones are not shared:

category_check <- census_temp %>%
  filter(category %nin% survey_temp$category)

## Some categories have very large WeightedN but are not present in survey
## We do what we can: restrict Census to just those in the survey 

census_temp <- census_temp %>%
  filter(category %in% survey_temp$category)

categories1 <- tibble(unique(census_temp$category))
categories2 <- tibble(unique(survey_temp$category))

## 28 not shared

category_check <- survey_temp %>%
  filter(category %nin% census_temp$category)

## Create new categories in census and assign minimum WeightedN

category_merge <- category_check %>%
  select(category)%>%
  mutate(WeightedN = min(census_temp$WeightedN))

## Make population a smaller dataframe

census_merge <- census_temp %>%
  ungroup%>%
  select(category,
         WeightedN)

census_merge <- rbind(census_merge,
                     category_merge)

categories1 <- tibble(unique(census_merge$category))
categories2 <- tibble(unique(survey_temp$category))


## 5. Calculate original survey first

svy.oldweight <- svydesign(ids=~id, weights=~Weight, strata = ~category, data=survey_temp)

## 6. Estimate post-stratified weights

svy.ps <- postStratify(design = svy.oldweight, strata = ~category, population = census_merge, partial = TRUE)


lm_df <- tibble(oldweight = attr(svy.ps$postStrata[[1]], "oldweights"),
                psweight = attr(svy.ps$postStrata[[1]], "weights")
)

lm1 <- lm(data = lm_df,
          oldweight ~ psweight)

summary(lm1)



survey.ps <- cbind(survey_temp, attr(svy.ps$postStrata[[1]], "weights"))
survey.ps_cmps2020 <- survey.ps %>%
  rename(PSweights = `attr(svy.ps$postStrata[[1]], "weights")`)



demog <- as.data.frame(c("provided","post-strat","Census"), stringsAsFactors=FALSE)
names(demog) <- "Weights"
demog$USBorn <- c(svymean(~USborn, svy.oldweight), svymean(~USborn, svy.ps), sum(subset(census_temp, USborn == 1)$WeightedN)/sum(census_temp$WeightedN))
demog$MeanAge[1] <- svymean(~age, svy.oldweight)
demog$MeanAge[2] <- svymean(~age, svy.ps)
demog$MeanAge[3] <- NA
demog$Dem[1] <- svymean(~Dem, svy.oldweight)
demog$Dem[2] <- svymean(~Dem, svy.ps)
demog$Dem[3] <- NA

print(demog)



```


```{r Final merge}

survey.ps_merged <- rbind(survey.ps_cces2006,
                          survey.ps_ces2008,
                          survey.ps_ces2009,
                          survey.ps_ces2010,
                          survey.ps_ces2011,
                          survey.ps_ces2012,
                          survey.ps_ces2013,
                          survey.ps_ces2014,
                          survey.ps_ces2015,
                          survey.ps_ces2016,
                          survey.ps_ces2017,
                          survey.ps_ces2018,
                          survey.ps_ces2019,
                          survey.ps_ces2020,
                          survey.ps_ces2021,
                          survey.ps_ces2022,
                          survey.ps_ces2023,
                          survey.ps_cmps2016,
                          survey.ps_cmps2020,
                          survey.ps_kaiser1999,
                          survey.ps_kaiserpew2002,
                          survey.ps_lnps1989,
                          survey.ps_lns2006,
                          survey.ps_pew2004,
                          survey.ps_pew2006,
                          survey.ps_pew2007,
                          survey.ps_pew2008,
                          survey.ps_pew2009,
                          survey.ps_pew2010,
                          survey.ps_pew2011,
                          survey.ps_pew2012,
                          survey.ps_pew2013,
                          survey.ps_pew2014,
                          survey.ps_pew2015,
                          survey.ps_pew2016,
                          survey.ps_pew2018)

 write_csv(survey.ps_merged,
           "Data/survey.ps_merged.csv")

```




